Segment Size

##Segment Size##
cat('Propotion of each segment:  \n')
## Propotion of each segment:
print(table(myData$segment))
## 
##    1    2    3    4    5    6    7 
## 1082  543  488  496 1199  637  523
print(round(table(myData$segment)/nrow(myData),digit=3))
## 
##     1     2     3     4     5     6     7 
## 0.218 0.109 0.098 0.100 0.241 0.128 0.105

seg1

########
##seg1##
########
seg1rate <- nrow(seg1)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg1)){
  if(length(table(seg1[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg1[i])/table(myData[i]))/seg1rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}

cat('seg1 compare to total: \n')
## seg1 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 1.025 0.837 
## 
## $feb
## 
##     0     1 
## 1.025 0.852 
## 
## $mar
## 
##     0     1 
## 1.024 0.880 
## 
## $apr
## 
##     0     1 
## 1.036 0.805 
## 
## $may
## 
##     0     1 
## 1.021 0.902 
## 
## $jun
## 
##     0     1 
## 1.021 0.881 
## 
## $jul
## 
##     0     1 
## 0.980 1.108 
## 
## $aug
## 
##     0     1 
## 0.970 1.097 
## 
## $sep
## 
##     0     1 
## 0.980 1.101 
## 
## $oct
## 
##     0     1 
## 1.014 0.889 
## 
## $nov
## 
##     0     1 
## 1.002 0.975 
## 
## $dec
## 
##     0     1 
## 1.011 0.904 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.849 1.179 1.216 1.246 0.790 0.927 0.783 0.944 0.873 0.889 1.007 0.955 
##    12    13    14    15    16    17    18    19    20    21 
## 1.099 1.198 1.069 1.168 0.852 1.100 0.723 0.620 0.889 0.758 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.148 1.424 1.505 1.530 0.782 1.113 0.918 1.023 1.216 0.795 1.030 0.956 
##    12    13    14    15    16    17    18    19    20    21 
## 1.078 1.082 0.985 1.178 0.944 0.877 0.760 0.840 0.784 0.770 
## 
## $retained_flag
## 
##     0     1 
## 1.052 0.910 
## 
## $ever_responded
## 
##    0    1 
## 1.05 0.90 
## 
## $man_dept_buy
## 
##     0     1 
## 1.022 0.919 
## 
## $womens_dept_buy
## 
##     0     1 
## 1.020 0.982 
## 
## $kids_dept_buy
## 
##     0     1 
## 1.001 0.993 
## 
## $athletic_dept_buy
## 
##     0     1 
## 0.962 1.026 
## 
## $accessories_dept_buy
## 
##     0     1 
## 1.003 0.993
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##      jan.1      feb.1      mar.1      apr.1      jun.1      jul.1 
##     -0.163     -0.148     -0.120     -0.195     -0.119      0.108 
##      sep.1      oct.1  recency.0  recency.1  recency.2  recency.3 
##      0.101     -0.111     -0.151      0.179      0.216      0.246 
##  recency.4  recency.6  recency.8  recency.9 recency.13 recency.15 
##     -0.210     -0.217     -0.127     -0.111      0.198      0.168 
## recency.16 recency.18 recency.19 recency.20 recency.21   tenure.0 
##     -0.148     -0.277     -0.380     -0.111     -0.242      0.148 
##   tenure.1   tenure.2   tenure.3   tenure.4   tenure.5   tenure.8 
##      0.424      0.505      0.530     -0.218      0.113      0.216 
##   tenure.9  tenure.15  tenure.17  tenure.18  tenure.19  tenure.20 
##     -0.205      0.178     -0.123     -0.240     -0.160     -0.216 
##  tenure.21 
##     -0.230
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg1 / Mean of total:\n')
## Mean of Seg1 / Mean of total:
mean_table <- round(colMeans(seg1)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                1.058                1.023                0.837 
##                  feb                  mar                  apr 
##                0.852                0.880                0.805 
##                  may                  jun                  jul 
##                0.902                0.881                1.108 
##                  aug                  sep                  oct 
##                1.097                1.101                0.889 
##                  nov                  dec          total_spend 
##                0.975                0.904                0.994 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                0.910                0.980                0.974 
##  athletic_dept_spend    accessories_spend              recency 
##                1.021                0.999                0.950 
##             response           total_txns          total_items 
##                0.847                0.947                0.970 
##         unique_sizes         unique_depts       internet_spend 
##                0.993                0.994                0.626 
##               tenure        retained_flag       retained_spend 
##                0.922                0.910                0.900 
##                cmpns         pct_response       ever_responded 
##                0.878                0.939                0.900 
##                opens               clicks             hhincome 
##                0.990                1.012                0.995 
##                hhage                hhwom                hhmen 
##                0.988                0.984                0.996 
##               hhkids         man_dept_buy      womens_dept_buy 
##                0.991                0.919                0.982 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                0.993                1.026                0.993
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##            jan            feb            mar            apr            jun 
##         -0.163         -0.148         -0.120         -0.195         -0.119 
##            jul            sep            oct       response internet_spend 
##          0.108          0.101         -0.111         -0.153         -0.374 
##          cmpns 
##         -0.122
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg2

########
##seg2##
########
seg2rate <- nrow(seg2)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg2)){
  if(length(table(seg2[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg2[i])/table(myData[i]))/seg2rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg2 compare to total: \n')
## seg2 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 1.010 0.937 
## 
## $feb
## 
##     0     1 
## 1.003 0.985 
## 
## $mar
## 
##     0     1 
## 1.008 0.959 
## 
## $apr
## 
##     0     1 
## 1.015 0.920 
## 
## $may
## 
##     0     1 
## 0.995 1.022 
## 
## $jun
## 
##     0     1 
## 1.029 0.834 
## 
## $jul
## 
##     0     1 
## 1.005 0.975 
## 
## $aug
## 
##     0     1 
## 1.000 0.999 
## 
## $sep
## 
##     0     1 
## 1.021 0.893 
## 
## $oct
## 
##     0     1 
## 0.981 1.144 
## 
## $nov
## 
##     0     1 
## 0.994 1.073 
## 
## $dec
## 
##     0     1 
## 0.978 1.195 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.327 0.917 0.956 1.039 0.878 1.175 0.993 1.070 1.243 1.082 1.158 1.111 
##    12    13    14    15    16    17    18    19    20    21 
## 1.123 0.845 1.025 1.029 0.548 0.822 0.864 1.179 0.738 0.504 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.083 1.158 1.139 1.298 0.973 0.832 0.798 1.233 1.515 1.380 1.411 1.161 
##    12    13    14    15    16    17    18    19    20    21 
## 1.214 1.056 1.045 0.884 0.759 0.829 0.742 0.889 0.753 0.782 
## 
## $retained_flag
## 
##     0     1 
## 1.013 0.977 
## 
## $ever_responded
## 
##     0     1 
## 1.006 0.987 
## 
## $man_dept_buy
## 
##     0     1 
## 0.947 1.193 
## 
## $womens_dept_buy
## 
##     0     1 
## 1.011 0.990 
## 
## $kids_dept_buy
## 
##     0     1 
## 1.021 0.901 
## 
## $athletic_dept_buy
## 
##     0     1 
## 1.034 0.977 
## 
## $accessories_dept_buy
## 
##     0     1 
## 1.021 0.954
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##          jun.1          sep.1          oct.1          dec.1      recency.0 
##         -0.166         -0.107          0.144          0.195          0.327 
##      recency.4      recency.5      recency.8     recency.10     recency.11 
##         -0.122          0.175          0.243          0.158          0.111 
##     recency.12     recency.13     recency.16     recency.17     recency.18 
##          0.123         -0.155         -0.452         -0.178         -0.136 
##     recency.19     recency.20     recency.21       tenure.1       tenure.2 
##          0.179         -0.262         -0.496          0.158          0.139 
##       tenure.3       tenure.5       tenure.6       tenure.7       tenure.8 
##          0.298         -0.168         -0.202          0.233          0.515 
##       tenure.9      tenure.10      tenure.11      tenure.12      tenure.15 
##          0.380          0.411          0.161          0.214         -0.116 
##      tenure.16      tenure.17      tenure.18      tenure.19      tenure.20 
##         -0.241         -0.171         -0.258         -0.111         -0.247 
##      tenure.21 man_dept_buy.1 
##         -0.218          0.193
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg2 / Mean of total:\n')
## Mean of Seg2 / Mean of total:
mean_table <- round(colMeans(seg2)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                1.016                1.001                0.937 
##                  feb                  mar                  apr 
##                0.985                0.959                0.920 
##                  may                  jun                  jul 
##                1.022                0.834                0.975 
##                  aug                  sep                  oct 
##                0.999                0.893                1.144 
##                  nov                  dec          total_spend 
##                1.073                1.195                0.993 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                1.133                1.053                0.850 
##  athletic_dept_spend    accessories_spend              recency 
##                0.945                0.939                0.954 
##             response           total_txns          total_items 
##                0.911                0.977                0.989 
##         unique_sizes         unique_depts       internet_spend 
##                0.995                0.992                1.749 
##               tenure        retained_flag       retained_spend 
##                0.933                0.977                0.942 
##                cmpns         pct_response       ever_responded 
##                0.925                1.028                0.987 
##                opens               clicks             hhincome 
##                0.998                1.002                0.989 
##                hhage                hhwom                hhmen 
##                1.000                0.999                0.995 
##               hhkids         man_dept_buy      womens_dept_buy 
##                0.976                1.193                0.990 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                0.901                0.977                0.954
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##             jun             sep             oct             dec 
##          -0.166          -0.107           0.144           0.195 
## mens_dept_spend kids_dept_spend  internet_spend    man_dept_buy 
##           0.133          -0.150           0.749           0.193
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg3

########
##seg3##
########
seg3rate <- nrow(seg3)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg3)){
  if(length(table(seg3[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg3[i])/table(myData[i]))/seg3rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg3 compare to total: \n')
## seg3 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 1.045 0.705 
## 
## $feb
## 
##     0     1 
## 1.034 0.793 
## 
## $mar
## 
##     0     1 
## 1.028 0.859 
## 
## $apr
## 
##     0     1 
## 1.003 0.984 
## 
## $may
## 
##     0     1 
## 1.018 0.919 
## 
## $jun
## 
##     0     1 
## 1.008 0.956 
## 
## $jul
## 
##     0     1 
## 1.021 0.889 
## 
## $aug
## 
##     0     1 
## 1.022 0.929 
## 
## $sep
## 
##     0     1 
## 1.001 0.994 
## 
## $oct
## 
##     0     1 
## 0.986 1.111 
## 
## $nov
## 
##     0     1 
## 1.003 0.967 
## 
## $dec
## 
##     0     1 
## 1.008 0.933 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.069 1.070 0.851 0.963 0.977 1.214 1.144 0.780 0.885 0.657 0.859 0.942 
##    12    13    14    15    16    17    18    19    20    21 
## 1.062 1.383 1.097 1.084 0.914 0.732 1.122 1.124 1.231 1.121 
## 
## $unique_depts
## 
##     1     2     3     4     5     6     7 
## 1.087 0.992 0.901 0.844 0.932 0.885 2.545 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.804 1.224 1.352 0.794 0.794 1.028 1.305 0.791 0.674 0.398 1.380 1.137 
##    12    13    14    15    16    17    18    19    20    21 
## 1.195 1.224 1.078 0.984 1.207 0.791 0.791 1.076 0.931 0.805 
## 
## $retained_flag
## 
##     0     1 
## 1.076 0.869 
## 
## $ever_responded
## 
##     0     1 
## 1.070 0.861 
## 
## $man_dept_buy
## 
##     0     1 
## 0.991 1.033 
## 
## $womens_dept_buy
## 
##     0     1 
## 1.081 0.926 
## 
## $kids_dept_buy
## 
##     0     1 
## 0.979 1.095 
## 
## $athletic_dept_buy
## 
##     0     1 
## 1.125 0.915 
## 
## $accessories_dept_buy
## 
##     0     1 
## 1.031 0.931
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##               jan.1               feb.1               mar.1 
##              -0.295              -0.207              -0.141 
##               jul.1               oct.1           recency.2 
##              -0.111               0.111              -0.149 
##           recency.5           recency.6           recency.7 
##               0.214               0.144              -0.220 
##           recency.8           recency.9          recency.10 
##              -0.115              -0.343              -0.141 
##          recency.13          recency.17          recency.18 
##               0.383              -0.268               0.122 
##          recency.19          recency.20          recency.21 
##               0.124               0.231               0.121 
##      unique_depts.4      unique_depts.6      unique_depts.7 
##              -0.156              -0.115               1.545 
##            tenure.0            tenure.1            tenure.2 
##              -0.196               0.224               0.352 
##            tenure.3            tenure.4            tenure.6 
##              -0.206              -0.206               0.305 
##            tenure.7            tenure.8            tenure.9 
##              -0.209              -0.326              -0.602 
##           tenure.10           tenure.11           tenure.12 
##               0.380               0.137               0.195 
##           tenure.13           tenure.16           tenure.17 
##               0.224               0.207              -0.209 
##           tenure.18           tenure.21     retained_flag.1 
##              -0.209              -0.195              -0.131 
##    ever_responded.1 athletic_dept_buy.0 
##              -0.139               0.125
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg3 / Mean of total:\n')
## Mean of Seg3 / Mean of total:
mean_table <- round(colMeans(seg3)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                0.967                0.959                0.705 
##                  feb                  mar                  apr 
##                0.793                0.859                0.984 
##                  may                  jun                  jul 
##                0.919                0.956                0.889 
##                  aug                  sep                  oct 
##                0.929                0.994                1.111 
##                  nov                  dec          total_spend 
##                0.967                0.933                0.902 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                1.158                0.883                1.087 
##  athletic_dept_spend    accessories_spend              recency 
##                0.816                1.004                1.021 
##             response           total_txns          total_items 
##                0.719                0.931                0.944 
##         unique_sizes         unique_depts       internet_spend 
##                0.952                0.961                0.718 
##               tenure        retained_flag       retained_spend 
##                0.977                0.869                0.866 
##                cmpns         pct_response       ever_responded 
##                0.883                0.804                0.861 
##                opens               clicks             hhincome 
##                0.999                1.008                0.993 
##                hhage                hhwom                hhmen 
##                0.995                0.975                0.999 
##               hhkids         man_dept_buy      womens_dept_buy 
##                1.002                1.033                0.926 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                1.095                0.915                0.931
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##                 jan                 feb                 mar 
##              -0.295              -0.207              -0.141 
##                 jul                 oct     mens_dept_spend 
##              -0.111               0.111               0.158 
##   womens_dept_spend athletic_dept_spend            response 
##              -0.117              -0.184              -0.281 
##      internet_spend       retained_flag      retained_spend 
##              -0.282              -0.131              -0.134 
##               cmpns        pct_response      ever_responded 
##              -0.117              -0.196              -0.139
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg4

########
##seg4##
########
seg4rate <- nrow(seg4)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg4)){
  if(length(table(seg4[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg4[i])/table(myData[i]))/seg4rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg4 compare to total: \n')
## seg4 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 1.026 0.830 
## 
## $feb
## 
##     0     1 
## 1.048 0.709 
## 
## $mar
## 
##     0     1 
## 1.051 0.748 
## 
## $apr
## 
##     0     1 
## 0.999 1.007 
## 
## $may
## 
##     0     1 
## 0.974 1.119 
## 
## $jun
## 
##     0     1 
## 0.932 1.390 
## 
## $jul
## 
##     0     1 
## 0.980 1.106 
## 
## $aug
## 
##     0     1 
## 1.048 0.846 
## 
## $sep
## 
##     0     1 
## 1.052 0.730 
## 
## $oct
## 
##     0     1 
## 1.036 0.723 
## 
## $nov
## 
##     0     1 
## 1.023 0.699 
## 
## $dec
## 
##     0     1 
## 1.023 0.801 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.952 0.514 0.681 1.011 1.683 1.317 1.165 0.687 0.762 0.915 0.718 1.042 
##    12    13    14    15    16    17    18    19    20    21 
## 0.676 0.817 1.166 1.245 1.799 1.739 1.262 0.983 1.050 0.827 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.659 0.317 0.457 0.852 2.202 1.821 1.541 0.778 0.597 0.560 0.702 0.763 
##    12    13    14    15    16    17    18    19    20    21 
## 0.818 0.867 1.228 1.340 1.504 1.232 1.049 0.744 0.855 0.856 
## 
## $retained_flag
## 
##     0     1 
## 0.998 1.004 
## 
## $ever_responded
## 
##     0     1 
## 1.059 0.883 
## 
## $man_dept_buy
## 
##     0     1 
## 1.003 0.989 
## 
## $womens_dept_buy
## 
##     0     1 
## 1.060 0.946 
## 
## $kids_dept_buy
## 
##     0     1 
## 1.000 0.998 
## 
## $athletic_dept_buy
## 
##     0     1 
## 0.972 1.019 
## 
## $accessories_dept_buy
## 
##    0    1 
## 1.10 0.78
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##                  jan.1                  feb.1                  mar.1 
##                 -0.170                 -0.291                 -0.252 
##                  may.1                  jun.1                  jul.1 
##                  0.119                  0.390                  0.106 
##                  aug.1                  sep.1                  oct.1 
##                 -0.154                 -0.270                 -0.277 
##                  nov.1                  dec.1              recency.1 
##                 -0.301                 -0.199                 -0.486 
##              recency.2              recency.4              recency.5 
##                 -0.319                  0.683                  0.317 
##              recency.6              recency.7              recency.8 
##                  0.165                 -0.313                 -0.238 
##             recency.10             recency.12             recency.13 
##                 -0.282                 -0.324                 -0.183 
##             recency.14             recency.15             recency.16 
##                  0.166                  0.245                  0.799 
##             recency.17             recency.18             recency.21 
##                  0.739                  0.262                 -0.173 
##               tenure.0               tenure.1               tenure.2 
##                 -0.341                 -0.683                 -0.543 
##               tenure.3               tenure.4               tenure.5 
##                 -0.148                  1.202                  0.821 
##               tenure.6               tenure.7               tenure.8 
##                  0.541                 -0.222                 -0.403 
##               tenure.9              tenure.10              tenure.11 
##                 -0.440                 -0.298                 -0.237 
##              tenure.12              tenure.13              tenure.14 
##                 -0.182                 -0.133                  0.228 
##              tenure.15              tenure.16              tenure.17 
##                  0.340                  0.504                  0.232 
##              tenure.19              tenure.20              tenure.21 
##                 -0.256                 -0.145                 -0.144 
##       ever_responded.1 accessories_dept_buy.1 
##                 -0.117                 -0.220
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg4 / Mean of total:\n')
## Mean of Seg4 / Mean of total:
mean_table <- round(colMeans(seg4)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                0.974                0.995                0.830 
##                  feb                  mar                  apr 
##                0.709                0.748                1.007 
##                  may                  jun                  jul 
##                1.119                1.390                1.106 
##                  aug                  sep                  oct 
##                0.846                0.730                0.723 
##                  nov                  dec          total_spend 
##                0.699                0.801                0.903 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                0.969                0.836                0.905 
##  athletic_dept_spend    accessories_spend              recency 
##                0.939                0.732                1.093 
##             response           total_txns          total_items 
##                0.815                0.911                0.919 
##         unique_sizes         unique_depts       internet_spend 
##                0.951                0.956                1.709 
##               tenure        retained_flag       retained_spend 
##                1.011                1.004                1.009 
##                cmpns         pct_response       ever_responded 
##                0.956                0.853                0.883 
##                opens               clicks             hhincome 
##                1.020                1.021                0.963 
##                hhage                hhwom                hhmen 
##                0.975                0.978                0.964 
##               hhkids         man_dept_buy      womens_dept_buy 
##                0.926                0.989                0.946 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                0.998                1.019                0.780
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##                  jan                  feb                  mar 
##               -0.170               -0.291               -0.252 
##                  may                  jun                  jul 
##                0.119                0.390                0.106 
##                  aug                  sep                  oct 
##               -0.154               -0.270               -0.277 
##                  nov                  dec    womens_dept_spend 
##               -0.301               -0.199               -0.164 
##    accessories_spend             response       internet_spend 
##               -0.268               -0.185                0.709 
##         pct_response       ever_responded accessories_dept_buy 
##               -0.147               -0.117               -0.220
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg5

########
##seg5##
########
seg5rate <- nrow(seg5)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg5)){
  if(length(table(seg5[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg5[i])/table(myData[i]))/seg5rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg5 compare to total: \n')
## seg5 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 0.994 1.042 
## 
## $feb
## 
##     0     1 
## 1.008 0.951 
## 
## $mar
## 
##     0     1 
## 0.999 1.003 
## 
## $apr
## 
##     0     1 
## 0.995 1.025 
## 
## $may
## 
##     0     1 
## 1.011 0.949 
## 
## $jun
## 
##     0     1 
## 1.020 0.885 
## 
## $jul
## 
##     0     1 
## 1.013 0.931 
## 
## $aug
## 
##     0     1 
## 1.025 0.919 
## 
## $sep
## 
##     0     1 
## 1.022 0.886 
## 
## $oct
## 
##     0     1 
## 1.000 0.999 
## 
## $nov
## 
##     0     1 
## 0.995 1.065 
## 
## $dec
## 
##     0     1 
## 1.009 0.921 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.243 0.902 1.003 0.915 1.011 0.976 1.269 1.203 0.923 1.047 0.979 1.054 
##    12    13    14    15    16    17    18    19    20    21 
## 1.042 0.766 0.911 0.907 1.092 0.769 0.914 1.042 0.735 1.330 
## 
## $unique_depts
## 
##     1     2     3     4     5     6     7 
## 1.035 1.075 0.960 0.738 0.759 0.540 2.072 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.745 1.128 1.083 0.911 1.117 1.004 1.169 1.224 0.960 1.181 1.065 1.136 
##    12    13    14    15    16    17    18    19    20    21 
## 0.909 0.876 0.773 0.924 0.835 1.046 1.008 0.900 0.846 1.036 
## 
## $retained_flag
## 
##     0     1 
## 1.011 0.981 
## 
## $ever_responded
## 
##     0     1 
## 1.060 0.882 
## 
## $opens
## 
##  0.04 0.041 0.042 0.043 0.044 0.045 0.046 0.047 0.048 0.049  0.05 0.051 
## 0.921 0.444 0.622 1.015 0.761 0.761 0.625 0.712 0.857 0.869 1.013 1.228 
## 0.052 0.053 0.054 0.055 0.056 0.057 0.058 0.059  0.06 0.061 0.062 0.063 
## 1.434 1.076 1.036 1.058 0.921 1.059 1.064 1.322 0.849 0.885 1.007 0.921 
## 0.064 0.065 0.066 0.067 0.068 0.069  0.07 0.071 0.072 0.073 0.074 0.075 
## 0.853 0.938 1.188 0.998 1.120 1.120 0.862 0.893 1.021 1.059 1.071 1.395 
## 0.076 0.077 0.078 0.079  0.08 0.081 0.082 0.083 0.084 0.085 0.086 0.087 
## 0.518 0.345 0.863 1.776 0.863 0.812 1.130 0.921 0.637 1.657 1.209 0.518 
## 0.088 0.089  0.09 0.091 0.092 0.093 0.094 0.095 0.096 0.097 0.098 0.099 
## 1.036 1.105 0.829 1.776 0.637 0.777 1.462 1.450 0.296 1.130 0.296 1.184 
##   0.1 0.101 0.102 0.103 0.104 0.105 
## 0.829 0.592 1.706 0.888 0.956 1.657 
## 
## $man_dept_buy
## 
##     0     1 
## 1.014 0.949 
## 
## $womens_dept_buy
## 
##     0     1 
## 1.014 0.988 
## 
## $kids_dept_buy
## 
##     0     1 
## 1.023 0.892 
## 
## $athletic_dept_buy
## 
##     0     1 
## 1.062 0.958 
## 
## $accessories_dept_buy
## 
##     0     1 
## 1.009 0.979
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##            jun.1            sep.1        recency.0        recency.6 
##           -0.115           -0.114            0.243            0.269 
##        recency.7       recency.13       recency.17       recency.20 
##            0.203           -0.234           -0.231           -0.265 
##       recency.21   unique_depts.4   unique_depts.5   unique_depts.6 
##            0.330           -0.262           -0.241           -0.460 
##   unique_depts.7         tenure.0         tenure.1         tenure.4 
##            1.072            0.745            0.128            0.117 
##         tenure.6         tenure.7         tenure.9        tenure.11 
##            0.169            0.224            0.181            0.136 
##        tenure.13        tenure.14        tenure.16        tenure.20 
##           -0.124           -0.227           -0.165           -0.154 
## ever_responded.1      opens.0.041      opens.0.042      opens.0.044 
##           -0.118           -0.556           -0.378           -0.239 
##      opens.0.045      opens.0.046      opens.0.047      opens.0.048 
##           -0.239           -0.375           -0.288           -0.143 
##      opens.0.049      opens.0.051      opens.0.052      opens.0.059 
##           -0.131            0.228            0.434            0.322 
##       opens.0.06      opens.0.061      opens.0.064      opens.0.066 
##           -0.151           -0.115           -0.147            0.188 
##      opens.0.068      opens.0.069       opens.0.07      opens.0.071 
##            0.120            0.120           -0.138           -0.107 
##      opens.0.075      opens.0.076      opens.0.077      opens.0.078 
##            0.395           -0.482           -0.655           -0.137 
##      opens.0.079       opens.0.08      opens.0.081      opens.0.082 
##            0.776           -0.137           -0.188            0.130 
##      opens.0.084      opens.0.085      opens.0.086      opens.0.087 
##           -0.363            0.657            0.209           -0.482 
##      opens.0.089       opens.0.09      opens.0.091      opens.0.092 
##            0.105           -0.171            0.776           -0.363 
##      opens.0.093      opens.0.094      opens.0.095      opens.0.096 
##           -0.223            0.462            0.450           -0.704 
##      opens.0.097      opens.0.098      opens.0.099        opens.0.1 
##            0.130           -0.704            0.184           -0.171 
##      opens.0.101      opens.0.102      opens.0.103      opens.0.105 
##           -0.408            0.706           -0.112            0.657 
##  kids_dept_buy.1 
##           -0.108
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg5 / Mean of total:\n')
## Mean of Seg5 / Mean of total:
mean_table <- round(colMeans(seg5)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                0.987                1.016                1.042 
##                  feb                  mar                  apr 
##                0.951                1.003                1.025 
##                  may                  jun                  jul 
##                0.949                0.885                0.931 
##                  aug                  sep                  oct 
##                0.919                0.886                0.999 
##                  nov                  dec          total_spend 
##                1.065                0.921                0.918 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                0.934                0.934                0.888 
##  athletic_dept_spend    accessories_spend              recency 
##                0.910                0.906                0.982 
##             response           total_txns          total_items 
##                0.825                0.931                0.897 
##         unique_sizes         unique_depts       internet_spend 
##                0.947                0.957                0.955 
##               tenure        retained_flag       retained_spend 
##                0.958                0.981                0.949 
##                cmpns         pct_response       ever_responded 
##                0.918                0.884                0.882 
##                opens               clicks             hhincome 
##                1.009                1.013                0.981 
##                hhage                hhwom                hhmen 
##                0.978                0.979                0.978 
##               hhkids         man_dept_buy      womens_dept_buy 
##                0.954                0.949                0.988 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                0.892                0.958                0.979
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##             jun             sep kids_dept_spend        response 
##          -0.115          -0.114          -0.112          -0.175 
##     total_items    pct_response  ever_responded   kids_dept_buy 
##          -0.103          -0.116          -0.118          -0.108
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg6

########
##seg6##
########
seg6rate <- nrow(seg6)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg6)){
  if(length(table(seg6[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg6[i])/table(myData[i]))/seg6rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg6 compare to total: \n')
## seg6 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 0.959 1.269 
## 
## $feb
## 
##     0     1 
## 0.968 1.193 
## 
## $mar
## 
##     0     1 
## 0.933 1.334 
## 
## $apr
## 
##     0     1 
## 0.928 1.387 
## 
## $may
## 
##     0     1 
## 0.948 1.241 
## 
## $jun
## 
##     0     1 
## 0.960 1.231 
## 
## $jul
## 
##     0     1 
## 0.976 1.131 
## 
## $aug
## 
##     0     1 
## 0.931 1.224 
## 
## $sep
## 
##     0     1 
## 0.932 1.350 
## 
## $oct
## 
##     0     1 
## 0.966 1.263 
## 
## $nov
## 
##     0     1 
## 0.981 1.242 
## 
## $dec
## 
##     0     1 
## 0.952 1.414 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 1.131 1.354 1.060 0.886 0.811 0.763 0.725 1.006 0.763 1.006 1.283 0.992 
##    12    13    14    15    16    17    18    19    20    21 
## 0.957 1.144 1.008 0.877 1.027 0.841 1.228 1.053 1.006 0.859 
## 
## $total_txns
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 0.744 0.917 1.263 1.160 1.904 1.773 1.902 1.779 2.311 2.753 1.418 1.950 
##    13    14    15 
## 3.900 5.199 3.900 
## 
## $unique_depts
## 
##     1     2     3     4     5     6     7 
## 0.781 0.924 1.327 1.408 1.538 1.356 1.950 
## 
## $tenure
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.513 0.889 0.647 0.830 0.387 0.473 0.560 0.566 0.465 0.871 0.620 0.831 
##    12    13    14    15    16    17    18    19    20    21 
## 0.756 0.862 1.195 0.928 1.110 1.161 1.792 1.515 1.403 1.456 
## 
## $retained_flag
## 
##     0     1 
## 0.836 1.284 
## 
## $cmpns
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.580 0.544 0.765 0.998 0.820 0.859 1.020 0.936 1.287 1.300 1.108 1.444 
##    12    13    14    15    16    17    18    19    20    21    22    23 
## 1.766 2.112 2.885 1.800 2.304 3.200 2.925 2.482 4.129 1.300 2.600 5.199 
##    24 
## 7.799 
## 
## $ever_responded
## 
##     0     1 
## 0.735 1.524 
## 
## $man_dept_buy
## 
##     0     1 
## 0.977 1.082 
## 
## $womens_dept_buy
## 
##     0     1 
## 0.878 1.111 
## 
## $kids_dept_buy
## 
##     0     1 
## 0.960 1.184 
## 
## $athletic_dept_buy
## 
##     0     1 
## 0.905 1.065 
## 
## $accessories_dept_buy
## 
##     0     1 
## 0.879 1.266
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##                  jan.1                  feb.1                  mar.1 
##                  0.269                  0.193                  0.334 
##                  apr.1                  may.1                  jun.1 
##                  0.387                  0.241                  0.231 
##                  jul.1                  aug.1                  sep.1 
##                  0.131                  0.224                  0.350 
##                  oct.1                  nov.1                  dec.1 
##                  0.263                  0.242                  0.414 
##              recency.0              recency.1              recency.3 
##                  0.131                  0.354                 -0.114 
##              recency.4              recency.5              recency.6 
##                 -0.189                 -0.237                 -0.275 
##              recency.8             recency.10             recency.13 
##                 -0.237                  0.283                  0.144 
##             recency.15             recency.17             recency.18 
##                 -0.123                 -0.159                  0.228 
##             recency.21           total_txns.1           total_txns.3 
##                 -0.141                 -0.256                  0.263 
##           total_txns.4           total_txns.5           total_txns.6 
##                  0.160                  0.904                  0.773 
##           total_txns.7           total_txns.8           total_txns.9 
##                  0.902                  0.779                  1.311 
##          total_txns.10          total_txns.11          total_txns.12 
##                  1.753                  0.418                  0.950 
##          total_txns.13          total_txns.14          total_txns.15 
##                  2.900                  4.199                  2.900 
##         unique_depts.1         unique_depts.3         unique_depts.4 
##                 -0.219                  0.327                  0.408 
##         unique_depts.5         unique_depts.6         unique_depts.7 
##                  0.538                  0.356                  0.950 
##               tenure.0               tenure.1               tenure.2 
##                 -0.487                 -0.111                 -0.353 
##               tenure.3               tenure.4               tenure.5 
##                 -0.170                 -0.613                 -0.527 
##               tenure.6               tenure.7               tenure.8 
##                 -0.440                 -0.434                 -0.535 
##               tenure.9              tenure.10              tenure.11 
##                 -0.129                 -0.380                 -0.169 
##              tenure.12              tenure.13              tenure.14 
##                 -0.244                 -0.138                  0.195 
##              tenure.16              tenure.17              tenure.18 
##                  0.110                  0.161                  0.792 
##              tenure.19              tenure.20              tenure.21 
##                  0.515                  0.403                  0.456 
##        retained_flag.0        retained_flag.1                cmpns.0 
##                 -0.164                  0.284                 -0.420 
##                cmpns.1                cmpns.2                cmpns.4 
##                 -0.456                 -0.235                 -0.180 
##                cmpns.5                cmpns.8                cmpns.9 
##                 -0.141                  0.287                  0.300 
##               cmpns.10               cmpns.11               cmpns.12 
##                  0.108                  0.444                  0.766 
##               cmpns.13               cmpns.14               cmpns.15 
##                  1.112                  1.885                  0.800 
##               cmpns.16               cmpns.17               cmpns.18 
##                  1.304                  2.200                  1.925 
##               cmpns.19               cmpns.20               cmpns.21 
##                  1.482                  3.129                  0.300 
##               cmpns.22               cmpns.23               cmpns.24 
##                  1.600                  4.199                  6.799 
##       ever_responded.0       ever_responded.1      womens_dept_buy.0 
##                 -0.265                  0.524                 -0.122 
##      womens_dept_buy.1        kids_dept_buy.1 accessories_dept_buy.0 
##                  0.111                  0.184                 -0.121 
## accessories_dept_buy.1 
##                  0.266
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it

tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg6 / Mean of total:\n')
## Mean of Seg6 / Mean of total:
mean_table<- round(colMeans(myData)/colMeans(seg6),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                1.031                1.052                0.788 
##                  feb                  mar                  apr 
##                0.838                0.749                0.721 
##                  may                  jun                  jul 
##                0.806                0.812                0.884 
##                  aug                  sep                  oct 
##                0.817                0.741                0.792 
##                  nov                  dec          total_spend 
##                0.805                0.707                0.784 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                0.892                0.764                0.800 
##  athletic_dept_spend    accessories_spend              recency 
##                0.776                0.714                1.014 
##             response           total_txns          total_items 
##                0.516                0.766                0.757 
##         unique_sizes         unique_depts       internet_spend 
##                0.865                0.884                0.752 
##               tenure        retained_flag       retained_spend 
##                0.858                0.779                0.734 
##                cmpns         pct_response       ever_responded 
##                0.680                0.664                0.656 
##                opens               clicks             hhincome 
##                1.014                1.060                0.926 
##                hhage                hhwom                hhmen 
##                0.928                0.921                0.931 
##               hhkids         man_dept_buy      womens_dept_buy 
##                0.853                0.924                0.900 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                0.845                0.939                0.790
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##                  jan                  feb                  mar 
##               -0.212               -0.162               -0.251 
##                  apr                  may                  jun 
##               -0.279               -0.194               -0.188 
##                  jul                  aug                  sep 
##               -0.116               -0.183               -0.259 
##                  oct                  nov                  dec 
##               -0.208               -0.195               -0.293 
##          total_spend      mens_dept_spend    womens_dept_spend 
##               -0.216               -0.108               -0.236 
##      kids_dept_spend  athletic_dept_spend    accessories_spend 
##               -0.200               -0.224               -0.286 
##             response           total_txns          total_items 
##               -0.484               -0.234               -0.243 
##         unique_sizes         unique_depts       internet_spend 
##               -0.135               -0.116               -0.248 
##               tenure        retained_flag       retained_spend 
##               -0.142               -0.221               -0.266 
##                cmpns         pct_response       ever_responded 
##               -0.320               -0.336               -0.344 
##               hhkids        kids_dept_buy accessories_dept_buy 
##               -0.147               -0.155               -0.210
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

seg7

########
##seg7##
########
seg7rate <- nrow(seg7)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()

for(i in colnames(seg7)){
  if(length(table(seg7[,i]))==length(table(myData[,i]))){
    compare_rate[i]<-list(value=(round((table(seg7[i])/table(myData[i]))/seg7rate,digit=3)))}
  else{
    need_mannual <- append(need_mannual,i)

  }
}
cat('seg7 compare to total: \n')
## seg7 compare to total:
print(compare_rate)
## $jan
## 
##     0     1 
## 0.936 1.416 
## 
## $feb
## 
##     0     1 
## 0.889 1.668 
## 
## $mar
## 
##     0     1 
## 0.950 1.247 
## 
## $apr
## 
##     0     1 
## 1.006 0.967 
## 
## $may
## 
##     0     1 
## 1.008 0.965 
## 
## $jun
## 
##     0     1 
## 0.987 1.073 
## 
## $jul
## 
##     0     1 
## 1.036 0.805 
## 
## $aug
## 
##     0     1 
## 1.023 0.924 
## 
## $sep
## 
##     0     1 
## 1.000 0.998 
## 
## $oct
## 
##    0    1 
## 1.01 0.92 
## 
## $nov
## 
##     0     1 
## 1.012 0.849 
## 
## $dec
## 
##     0     1 
## 1.009 0.926 
## 
## $recency
## 
##     0     1     2     3     4     5     6     7     8     9    10    11 
## 0.237 0.906 0.960 0.809 1.140 0.813 0.884 1.072 1.807 1.430 0.922 0.879 
##    12    13    14    15    16    17    18    19    20    21 
## 0.874 0.929 0.778 0.674 0.853 1.251 1.272 1.340 1.839 1.482 
## 
## $retained_flag
## 
##     0     1 
## 0.985 1.025 
## 
## $ever_responded
## 
##     0     1 
## 0.953 1.093 
## 
## $man_dept_buy
## 
##     0     1 
## 1.010 0.964 
## 
## $womens_dept_buy
## 
##     0     1 
## 0.933 1.061 
## 
## $kids_dept_buy
## 
##     0     1 
## 0.988 1.054 
## 
## $athletic_dept_buy
## 
##     0     1 
## 0.927 1.050 
## 
## $accessories_dept_buy
## 
##     0     1 
## 0.974 1.058
cat('filtered, +/- 10%: \n') 
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
##      jan.1      feb.0      feb.1      mar.1      jul.1      nov.1 
##      0.416     -0.111      0.668      0.247     -0.195     -0.151 
##  recency.0  recency.3  recency.4  recency.5  recency.6  recency.8 
##     -0.763     -0.191      0.140     -0.187     -0.116      0.807 
##  recency.9 recency.11 recency.12 recency.14 recency.15 recency.16 
##      0.430     -0.121     -0.126     -0.222     -0.326     -0.147 
## recency.17 recency.18 recency.19 recency.20 recency.21 
##      0.251      0.272      0.340      0.839      0.482
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it

recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################

cat('Mean of Seg7 / Mean of total:\n')
## Mean of Seg7 / Mean of total:
mean_table <- round(colMeans(seg7)/colMeans(myData),digit=3)[-46]
print(mean_table)
##        spend_per_txn       spend_per_item                  jan 
##                0.985                1.019                1.416 
##                  feb                  mar                  apr 
##                1.668                1.247                0.967 
##                  may                  jun                  jul 
##                0.965                1.073                0.805 
##                  aug                  sep                  oct 
##                0.924                0.998                0.920 
##                  nov                  dec          total_spend 
##                0.849                0.926                1.058 
##      mens_dept_spend    womens_dept_spend      kids_dept_spend 
##                0.933                1.026                1.171 
##  athletic_dept_spend    accessories_spend              recency 
##                1.096                1.044                1.101 
##             response           total_txns          total_items 
##                1.105                1.070                1.048 
##         unique_sizes         unique_depts       internet_spend 
##                1.044                1.037                0.288 
##               tenure        retained_flag       retained_spend 
##                1.136                1.025                1.060 
##                cmpns         pct_response       ever_responded 
##                1.098                1.069                1.093 
##                opens               clicks             hhincome 
##                1.002                0.983                1.010 
##                hhage                hhwom                hhmen 
##                1.009                1.021                1.009 
##               hhkids         man_dept_buy      womens_dept_buy 
##                1.006                0.964                1.061 
##        kids_dept_buy    athletic_dept_buy accessories_dept_buy 
##                1.054                1.050                1.058
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
##             jan             feb             mar             jul 
##           0.416           0.668           0.247          -0.195 
##             nov kids_dept_spend         recency        response 
##          -0.151           0.171           0.101           0.105 
##  internet_spend          tenure 
##          -0.712           0.136
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it

month_mean <- data.frame(
  month=seq(1,12),
  value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
  geom_line(col="royalblue3")+
  geom_hline(yintercept=0,col='firebrick3')+
  geom_point(col="royalblue3",alpha=0.75)+
  labs(y='+- % than\n mean of total', title='Purchase happening in month')+
  scale_x_continuous(breaks=seq(1,12))

Summary of mean, median

############################
##create mean,median table##
############################

len <- seq(1,ncol(myData)-1)
summary_mean <- 
  data.frame(seg1=len,seg2=len,seg3=len,seg4=len,seg5=len,seg6=len,seg7=len)

summary_median <- 
  data.frame(seg1=len,seg2=len,seg3=len,seg4=len,seg5=len,seg6=len,seg7=len)
####
for (i in seq(1,7)){
median <- summary(myData[myData$segment==i,])[seq(3,(ncol(myData)-1)*6-3,6)]
median <- strsplit(median,':')
median <- strsplit(unlist(median)," ")
median <- stri_remove_empty(unlist(median))[seq(2,90,2)]

mean <- summary(myData[myData$segment==i,])[seq(4,(ncol(myData)-1)*6-2,6)]
mean <- strsplit(mean,":")
mean <- strsplit(unlist(mean)," ")
mean <- stri_remove_empty(unlist(mean))[seq(2,nrow(myData)*2,2)]

####

summary_mean[,i] <- data.frame(temp=round(as.numeric(mean),3))
summary_median[,i] <- data.frame(temp=round(as.numeric(median),3))
}

summary_mean <- data.frame(t(summary_mean))
colnames(summary_mean) <- colnames(myData)[len]
summary_median <- data.frame(t(summary_median))
colnames(summary_median) <- colnames(myData)[len]

#####################
##create rank table##
#####################

rank_mean <- data.frame(segment=paste('seg',seq(1,7),sep=''))
rank_median <- data.frame(segment=paste('seg',seq(1,7),sep=''))
####
for (i in colnames(myData)[len]){
temp <- tibble::rownames_to_column(summary_mean, "segment")
temp <- temp%>%arrange(desc(!!parse_quosure(i)))
temp_2 <- tibble::rownames_to_column(summary_median, "segment")
temp_2 <- temp%>%arrange(desc(!!parse_quosure(i)))
for(j in temp$segment){
  rank_mean[rank_mean$segment==j,i] <- which((temp$segment)==j)
  rank_median[rank_median$segment==j,i] <- which((temp_2$segment)==j)
}}
####


write.csv(summary_mean,file='summary_mean.csv',row.names = TRUE)
write.csv(summary_mean,file='summary_median.csv',row.names = TRUE)
write.csv(rank_mean,file='rank_mean.csv',row.names = TRUE)
write.csv(rank_median,file='rank_median.csv',row.names = TRUE)


#assign(paste("seg",1,sep=""),mean)

Categorical Management

idx <- colnames(summary_mean)[c(seq(16,20),seq(41,45))]
category_management <- tibble::rownames_to_column(summary_mean[idx], "segment")
category_management$segment <- as.factor(category_management$segment)
##change the segment's name, if you already have
levels(category_management$segment) <- c('Summer Fling','Mad Christmas','Daddy Daycare','Hood Rich','Coal for Christmas','Personal Shopper','Be My Valentine')
title <- paste(c('MEN','WOMEN','KIDS','ATHLETIC','ACCESSORIES'),"department categorical management",sep=" ")

#####
for (i in seq(2,length(title)+1)){
idx_x=colnames(category_management)[i]
idx_y=colnames(category_management)[i+length(title)]

x=unlist(category_management[,idx_x])
y=unlist(category_management[,idx_y])*100
x_left_angle=min(x)-2
x_right_angle=max(x)+2
y_bot_angle=min(y)-2
y_top_angle=max(y)+2

print(
ggplot(category_management,aes(x,y,col=category_management$segment))+
  geom_point(size=5, alpha=0.8)+
  annotate("label", fill="grey", x =x_left_angle , y = y_bot_angle, label = "            Convenience", alpha = 0.3)+
  annotate("label", fill="grey", x =x_left_angle , y = y_top_angle, label = "     Occasion", alpha = 0.3)+
  annotate("label", fill="grey", x =x_right_angle , y = y_top_angle, label = "Destination         ", alpha = 0.3)+
  annotate("label", fill="grey", x =x_right_angle , y = y_bot_angle, label = "Routine    ", alpha = 0.3)+
  geom_hline(yintercept = (max(y)+min(y))/2, col='firebrick3',size=1)+
  geom_vline(xintercept = (max(x)+min(x))/2, col='firebrick3',size=1)+
  labs(x="average number of item purchase",y="% of customers in segment purchase", title=title[i-1])+
  theme(legend.title = element_blank())
)
}

Total_boxplot

mynamestheme <- theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)), 
                  axis.title = element_text(family = "Helvetica", size = (5), colour = "steelblue4"),
                  axis.text = element_text(family = "Courier", colour = "cornflowerblue", size = (15)),
                  legend.position = "none")


boxplot_var <- c("spend_per_txn","spend_per_item","total_spend",  "mens_dept_spend", "womens_dept_spend","kids_dept_spend","athletic_dept_spend","accessories_spend","response", "total_txns",   "total_items",  "unique_sizes", "unique_depts", "internet_spend", "tenure","retained_spend","cmpns","pct_response", "opens", "clicks","hhincome","hhage","hhwom","hhmen","hhkids")


for (i in boxplot_var){
  idx=myData[,i]!=0
  temp=myData[idx,c("segment",i)]
  x=as.factor(unlist(temp[1]))
  y=unlist(temp[2])
  
   print(ggplot(temp,aes(x,y,col=x,na.rm = TRUE))+
     geom_boxplot(na.rm = TRUE)+labs(x='segment', title=i, y="")+mynamestheme)
 }